library(tidyverse)
library(ggplot2)
library(lavaan)
library(car)
library(glmnet)
library(randomForestSRC)
library(caret)
library(ggRandomForests)
library(VSURF)VSURF
Data set
This data set is from the 2015 Asian American Quality of Life survey. Participants are from Austin, Texas.
Input data set
qol <- read_csv("AAQoL.csv") |> mutate(across(where(is.character), ~as.factor(.x))) |>
mutate(`English Difficulties`=relevel(`English Difficulties`,ref="Not at all"),
`English Speaking`=relevel(`English Speaking`,ref="Not at all"),
Ethnicity = relevel(Ethnicity,ref="Chinese"),
Religion=relevel(Religion,ref="None")) |>
mutate(Income_median = case_match(Income,"$0 - $9,999"~"Below",
"$10,000 - $19,999" ~"Below",
"$20,000 - $29,999"~"Below",
"$30,000 - $39,999"~"Below",
"$40,000 - $49,999"~"Below",
"$50,000 - $59,999"~"Below",
"$60,000 - $69,999"~"Above",
"$70,000 and over"~"Above",
.default=Income)) |>
mutate(Income_median = factor(Income_median, levels=c("Below","Above"))) |>
mutate(across(c(`Family Respect`:`Togetherness`,`Close-knit Community`:`Community Trust`),~relevel(.x,ref="Strongly disagree")))New names:
Rows: 2609 Columns: 231
── Column specification
──────────────────────────────────────────────────────── Delimiter: "," chr
(190): Gender, Ethnicity, Marital Status, No One, Spouse, Children, Gran... dbl
(41): Survey ID, Age, Education Completed, Household Size, Grandparent,...
ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
Specify the column types or set `show_col_types = FALSE` to quiet this message.
• `Other` -> `Other...17`
• `Other` -> `Other...89`
qol |> DT::datatable()Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html
Family
rfdata <- qol |> filter(Family %in% c("No","Yes")) |>
mutate(Family=droplevels(Family)) |>
select(Family, Ethnicity, Age, Gender,Religion, `Full Time Employment`, Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`,`Health Insurance`,`Dental Insurance`,`Discrimination`) |>
na.omit() |>
as.data.frame() |>
rename_with(make.names)
imbal <- ROSE::ROSE(Family~.,
data=rfdata,
seed=3)$data
VSURF(Family~.,imbal,na.action="na.omit",parallel=T,verbose=F)->vsurf.modWarning in VSURF.formula(Family ~ ., imbal, na.action = "na.omit", parallel = T, : VSURF with a formula-type call outputs selected variables
which are indices of the input matrix based on the formula:
you may reorder these to get indices of the original data
vsurf.mod |> summary()
VSURF computation time: 1.1 mins
VSURF selected:
34 variables at thresholding step (in 8.5 secs)
33 variables at interpretation step (in 7.4 secs)
16 variables at prediction step (in 47.1 secs)
VSURF ran in parallel on a PSOCK cluster and used 15 cores
names(rfdata[,-1])[vsurf.mod$varselect.pred] [1] "Ethnicity" "Age" "English.Difficulties"
[4] "Full.Time.Employment" "Helpful.Community" "Religion"
[7] "Get.Along" "Expression" "Religious.Importance"
[10] "English.Speaking" "Community.Trust" "Religious.Attendance"
[13] "Family.Respect" "Close.knit.Community" "Successful.Family"
[16] "Loyalty"
names(rfdata[,-1])[vsurf.mod$varselect.interp] [1] "Ethnicity" "Age"
[3] "English.Difficulties" "Full.Time.Employment"
[5] "Helpful.Community" "Religion"
[7] "See.Friends" "Discrimination"
[9] "Get.Along" "Helpful.Family"
[11] "Expression" "Religious.Importance"
[13] "Helpful.Friends" "English.Speaking"
[15] "Community.Trust" "See.Family"
[17] "Religious.Attendance" "Close.Family"
[19] "Community.Shares.Values" "Family.Respect"
[21] "Close.knit.Community" "Successful.Family"
[23] "Feel.Close" "Close.Friends"
[25] "Similar.Values" "Loyalty"
[27] "Spend.Time.Together" "Trust"
[29] "Togetherness" "Family.Pride"
[31] "Gender" "Dental.Insurance"
[33] "Income_median"
plot(vsurf.mod)
vsurf.mod$mean.perf[1] 0.2337747
Importance
vi<- data.frame(Variable=names(rfdata[,-1])[vsurf.mod$imp.mean.dec.ind],
Importance = vsurf.mod$imp.mean.dec,
sd_Importance = vsurf.mod$imp.sd.dec
)
vi |> mutate(across(Importance:sd_Importance,~round(.x,5))) Variable Importance sd_Importance
1 Ethnicity 0.03111 0.00084
2 Age 0.01877 0.00053
3 English.Difficulties 0.01638 0.00072
4 Full.Time.Employment 0.01598 0.00078
5 Helpful.Community 0.01584 0.00086
6 Religion 0.01454 0.00067
7 See.Friends 0.01239 0.00062
8 Discrimination 0.01230 0.00064
9 Get.Along 0.01030 0.00057
10 Helpful.Family 0.00993 0.00059
11 Expression 0.00910 0.00071
12 Religious.Importance 0.00887 0.00043
13 Helpful.Friends 0.00864 0.00055
14 English.Speaking 0.00848 0.00054
15 Community.Trust 0.00848 0.00062
16 See.Family 0.00818 0.00061
17 Religious.Attendance 0.00803 0.00043
18 Close.Family 0.00789 0.00063
19 Community.Shares.Values 0.00765 0.00028
20 Family.Respect 0.00679 0.00067
21 Close.knit.Community 0.00678 0.00032
22 Successful.Family 0.00649 0.00035
23 Feel.Close 0.00576 0.00033
24 Close.Friends 0.00543 0.00073
25 Similar.Values 0.00465 0.00035
26 Loyalty 0.00464 0.00043
27 Spend.Time.Together 0.00458 0.00036
28 Trust 0.00457 0.00035
29 Togetherness 0.00436 0.00037
30 Family.Pride 0.00329 0.00028
31 Gender 0.00310 0.00028
32 Dental.Insurance 0.00284 0.00028
33 Income_median 0.00238 0.00023
34 Health.Insurance 0.00163 0.00015
Logistic regression (Interpretation)
lr <- rfdata |> select(Family,names(rfdata[,-1])[vsurf.mod$varselect.pred])
lr_mod <- glm(Family~.,family=binomial,data=lr)
summary(lr_mod)
Call:
glm(formula = Family ~ ., family = binomial, data = lr)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.462345 0.621522 0.744 0.456942
EthnicityAsian Indian -0.187291 0.292473 -0.640 0.521930
EthnicityFilipino -0.355945 0.222263 -1.601 0.109276
EthnicityKorean -0.682400 0.166699 -4.094 4.25e-05
EthnicityOther -0.746636 0.246686 -3.027 0.002473
EthnicityVietnamese -0.669960 0.182331 -3.674 0.000238
Age -0.014485 0.003323 -4.359 1.31e-05
English.DifficultiesMuch 0.497108 0.163062 3.049 0.002299
English.DifficultiesNot much 0.062462 0.150904 0.414 0.678932
English.DifficultiesVery much -0.224421 0.146238 -1.535 0.124876
Full.Time.EmploymentEmployed full time -0.417455 0.100582 -4.150 3.32e-05
Helpful.CommunityAgree 1.094653 0.555671 1.970 0.048842
Helpful.CommunityDisagree 1.442807 0.551394 2.617 0.008880
Helpful.CommunityNeutral 1.115089 0.546111 2.042 0.041164
Helpful.CommunityStrongly agree 1.286134 0.596826 2.155 0.031165
ReligionBuddhist 0.164853 0.213445 0.772 0.439910
ReligionCatholic 0.052290 0.229066 0.228 0.819432
ReligionHindu -0.290346 0.323946 -0.896 0.370103
ReligionMuslim -0.261756 0.403336 -0.649 0.516353
ReligionOther -0.329418 0.417405 -0.789 0.429993
ReligionProtestant -0.070350 0.215719 -0.326 0.744335
Get.AlongAgree 0.455990 0.580675 0.785 0.432292
Get.AlongDisagree 0.424073 0.561892 0.755 0.450415
Get.AlongNeutral 0.308745 0.569997 0.542 0.588052
Get.AlongStrongly agree 0.470386 0.636725 0.739 0.460054
ExpressionSomewhat agree 0.155592 0.375439 0.414 0.678561
ExpressionSomewhat disagree 0.366586 0.377331 0.972 0.331288
ExpressionStrongly agree -0.030690 0.391770 -0.078 0.937561
Religious.ImportanceNot very important 0.100784 0.199292 0.506 0.613061
Religious.ImportanceSomewhat important -0.004287 0.216124 -0.020 0.984173
Religious.ImportanceVery important -0.172505 0.233551 -0.739 0.460138
English.SpeakingNot well -0.060861 0.250295 -0.243 0.807883
English.SpeakingVery well 0.130977 0.261275 0.501 0.616159
English.SpeakingWell -0.105323 0.249102 -0.423 0.672433
Community.TrustAgree -0.105091 0.462910 -0.227 0.820407
Community.TrustDisagree -0.110171 0.441164 -0.250 0.802798
Community.TrustNeutral -0.163027 0.450399 -0.362 0.717381
Community.TrustStrongly agree -0.029100 0.535987 -0.054 0.956703
Religious.AttendanceNever -0.338172 0.189725 -1.782 0.074678
Religious.AttendanceOnce or twice a month -0.010424 0.175749 -0.059 0.952704
Religious.AttendanceSeldom -0.282600 0.183881 -1.537 0.124327
Family.RespectSomewhat agree -0.495115 0.559476 -0.885 0.376177
Family.RespectSomewhat disagree -0.576795 0.555232 -1.039 0.298881
Family.RespectStrongly agree -0.349201 0.569423 -0.613 0.539709
Close.knit.CommunityAgree -0.640598 0.411912 -1.555 0.119903
Close.knit.CommunityDisagree -0.744618 0.409129 -1.820 0.068758
Close.knit.CommunityNeutral -0.666998 0.402707 -1.656 0.097664
Close.knit.CommunityStrongly agree -0.837243 0.458592 -1.826 0.067898
Successful.FamilySomewhat agree 0.694712 0.581141 1.195 0.231920
Successful.FamilySomewhat disagree 0.571446 0.582572 0.981 0.326641
Successful.FamilyStrongly agree 1.004185 0.596320 1.684 0.092188
LoyaltySomewhat agree -0.230806 0.644665 -0.358 0.720325
LoyaltySomewhat disagree 0.017210 0.644853 0.027 0.978709
LoyaltyStrongly agree -0.280903 0.654361 -0.429 0.667720
(Intercept)
EthnicityAsian Indian
EthnicityFilipino
EthnicityKorean ***
EthnicityOther **
EthnicityVietnamese ***
Age ***
English.DifficultiesMuch **
English.DifficultiesNot much
English.DifficultiesVery much
Full.Time.EmploymentEmployed full time ***
Helpful.CommunityAgree *
Helpful.CommunityDisagree **
Helpful.CommunityNeutral *
Helpful.CommunityStrongly agree *
ReligionBuddhist
ReligionCatholic
ReligionHindu
ReligionMuslim
ReligionOther
ReligionProtestant
Get.AlongAgree
Get.AlongDisagree
Get.AlongNeutral
Get.AlongStrongly agree
ExpressionSomewhat agree
ExpressionSomewhat disagree
ExpressionStrongly agree
Religious.ImportanceNot very important
Religious.ImportanceSomewhat important
Religious.ImportanceVery important
English.SpeakingNot well
English.SpeakingVery well
English.SpeakingWell
Community.TrustAgree
Community.TrustDisagree
Community.TrustNeutral
Community.TrustStrongly agree
Religious.AttendanceNever .
Religious.AttendanceOnce or twice a month
Religious.AttendanceSeldom
Family.RespectSomewhat agree
Family.RespectSomewhat disagree
Family.RespectStrongly agree
Close.knit.CommunityAgree
Close.knit.CommunityDisagree .
Close.knit.CommunityNeutral .
Close.knit.CommunityStrongly agree .
Successful.FamilySomewhat agree
Successful.FamilySomewhat disagree
Successful.FamilyStrongly agree .
LoyaltySomewhat agree
LoyaltySomewhat disagree
LoyaltyStrongly agree
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 2667.5 on 1925 degrees of freedom
Residual deviance: 2522.2 on 1872 degrees of freedom
AIC: 2630.2
Number of Fisher Scoring iterations: 4
car::Anova(lr_mod)Analysis of Deviance Table (Type II tests)
Response: Family
LR Chisq Df Pr(>Chisq)
Ethnicity 26.0657 5 8.666e-05 ***
Age 19.1896 1 1.184e-05 ***
English.Difficulties 19.9895 3 0.0001706 ***
Full.Time.Employment 17.3306 1 3.141e-05 ***
Helpful.Community 8.4105 4 0.0776476 .
Religion 3.3316 6 0.7662166
Get.Along 1.6094 4 0.8071070
Expression 3.5196 3 0.3182353
Religious.Importance 2.6403 3 0.4504635
English.Speaking 3.0835 3 0.3789242
Community.Trust 0.3619 4 0.9854745
Religious.Attendance 5.6145 3 0.1319463
Family.Respect 2.0952 3 0.5528927
Close.knit.Community 4.1280 4 0.3889646
Successful.Family 5.4185 3 0.1435970
Loyalty 1.0368 3 0.7923476
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
broom::tidy(lr_mod,exponentiate=T,conf.int=T) |> DT::datatable()Health Professionals
rfdata <- qol |> select(`Heal Professionals`, Ethnicity, Age, Gender,Religion, `Full Time Employment`, Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`,`Health Insurance`,`Dental Insurance`,`Discrimination`) |>
na.omit() |>
as.data.frame() |>
rename_with(make.names)
imbal <- ROSE::ROSE(Heal.Professionals~.,
data=rfdata,
seed=3)$data
VSURF(Heal.Professionals~.,imbal,na.action="na.omit",parallel=T,verbose=F)->vsurf.modWarning in VSURF.formula(Heal.Professionals ~ ., imbal, na.action = "na.omit", : VSURF with a formula-type call outputs selected variables
which are indices of the input matrix based on the formula:
you may reorder these to get indices of the original data
vsurf.mod |> summary()
VSURF computation time: 22.3 secs
VSURF selected:
34 variables at thresholding step (in 8.4 secs)
14 variables at interpretation step (in 6.8 secs)
2 variables at prediction step (in 7.2 secs)
VSURF ran in parallel on a PSOCK cluster and used 15 cores
names(rfdata[,-1])[vsurf.mod$varselect.pred][1] "English.Speaking" "Dental.Insurance"
names(rfdata[,-1])[vsurf.mod$varselect.interp] [1] "English.Speaking" "Ethnicity" "Dental.Insurance"
[4] "Religion" "English.Difficulties" "Health.Insurance"
[7] "Community.Trust" "Religious.Attendance" "Get.Along"
[10] "Helpful.Friends" "Helpful.Community" "See.Family"
[13] "Close.knit.Community" "Income_median"
plot(vsurf.mod)
vsurf.mod$mean.perf[1] 0.1991697
Importance
vi<- data.frame(Variable=names(rfdata[,-1])[vsurf.mod$imp.mean.dec.ind],
Importance = vsurf.mod$imp.mean.dec,
sd_Importance = vsurf.mod$imp.sd.dec
)
vi |> mutate(across(Importance:sd_Importance,~round(.x,5))) Variable Importance sd_Importance
1 English.Speaking 0.03496 0.00138
2 Ethnicity 0.03188 0.00101
3 Dental.Insurance 0.03173 0.00116
4 Religion 0.02632 0.00066
5 English.Difficulties 0.02255 0.00106
6 Health.Insurance 0.01514 0.00065
7 Community.Trust 0.01460 0.00052
8 Religious.Attendance 0.01350 0.00054
9 Get.Along 0.01327 0.00054
10 Helpful.Friends 0.01202 0.00050
11 Helpful.Community 0.01201 0.00061
12 See.Family 0.01171 0.00050
13 Close.knit.Community 0.01094 0.00040
14 Income_median 0.01079 0.00053
15 See.Friends 0.01062 0.00043
16 Community.Shares.Values 0.00995 0.00047
17 Spend.Time.Together 0.00886 0.00056
18 Religious.Importance 0.00883 0.00052
19 Helpful.Family 0.00768 0.00064
20 Close.Family 0.00755 0.00052
21 Close.Friends 0.00660 0.00050
22 Age 0.00646 0.00036
23 Discrimination 0.00532 0.00040
24 Family.Respect 0.00529 0.00035
25 Successful.Family 0.00522 0.00033
26 Family.Pride 0.00516 0.00035
27 Feel.Close 0.00494 0.00035
28 Expression 0.00491 0.00031
29 Trust 0.00449 0.00029
30 Loyalty 0.00431 0.00035
31 Similar.Values 0.00406 0.00036
32 Togetherness 0.00371 0.00021
33 Gender 0.00317 0.00024
34 Full.Time.Employment 0.00300 0.00029
Logistic regression (Interpretation)
lr <- rfdata |> select(Heal.Professionals,names(rfdata[,-1])[vsurf.mod$varselect.pred])
lr_mod <- glm(Heal.Professionals~.,family=binomial,data=lr)
summary(lr_mod)
Call:
glm(formula = Heal.Professionals ~ ., family = binomial, data = lr)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.0810 0.2293 -4.714 2.43e-06 ***
English.SpeakingNot well 0.3027 0.2488 1.217 0.22363
English.SpeakingVery well 1.2561 0.2448 5.132 2.87e-07 ***
English.SpeakingWell 0.8592 0.2444 3.515 0.00044 ***
Dental.InsuranceYes 0.5217 0.1007 5.180 2.22e-07 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 2668.3 on 1926 degrees of freedom
Residual deviance: 2530.9 on 1922 degrees of freedom
AIC: 2540.9
Number of Fisher Scoring iterations: 4
car::Anova(lr_mod)Analysis of Deviance Table (Type II tests)
Response: Heal.Professionals
LR Chisq Df Pr(>Chisq)
English.Speaking 70.887 3 2.756e-15 ***
Dental.Insurance 26.820 1 2.234e-07 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
broom::tidy(lr_mod,exponentiate=T,conf.int=T)|> DT::datatable()Physical Check-up
#install.packages("randomForestSRC)
rfdata <- qol |>
select(`Physical Check-up`, Ethnicity, Age, Gender,Religion, `Full Time Employment`, Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`,`Health Insurance`,`Dental Insurance`,`Discrimination`) %>%
na.omit() |>
rename(Employment=`Full Time Employment`,
EnglishSpeak=`English Speaking`,
EnglishDiff=`English Difficulties`) |>
as.data.frame() |>
rename_with(make.names)
imbal <- ROSE::ROSE(Physical.Check.up~.,
data=rfdata,
seed=3)$data
VSURF(Physical.Check.up~.,imbal,na.action="na.omit",parallel=T,verbose=F)->vsurf.modWarning in VSURF.formula(Physical.Check.up ~ ., imbal, na.action = "na.omit", : VSURF with a formula-type call outputs selected variables
which are indices of the input matrix based on the formula:
you may reorder these to get indices of the original data
vsurf.mod |> summary()
VSURF computation time: 23.2 secs
VSURF selected:
34 variables at thresholding step (in 8.2 secs)
15 variables at interpretation step (in 6.5 secs)
3 variables at prediction step (in 8.5 secs)
VSURF ran in parallel on a PSOCK cluster and used 15 cores
names(rfdata[,-1])[vsurf.mod$varselect.pred][1] "Dental.Insurance" "Health.Insurance" "Age"
names(rfdata[,-1])[vsurf.mod$varselect.interp] [1] "Dental.Insurance" "Health.Insurance"
[3] "Age" "Ethnicity"
[5] "Religion" "Income_median"
[7] "EnglishDiff" "Close.knit.Community"
[9] "EnglishSpeak" "Community.Trust"
[11] "Get.Along" "Community.Shares.Values"
[13] "Religious.Attendance" "Helpful.Community"
[15] "Gender"
plot(vsurf.mod)
vsurf.mod$mean.perf[1] 0.149244
Importance
vi<- data.frame(Variable=names(rfdata[,-1])[vsurf.mod$imp.mean.dec.ind],
Importance = vsurf.mod$imp.mean.dec,
sd_Importance = vsurf.mod$imp.sd.dec
)
vi |> mutate(across(Importance:sd_Importance,~round(.x,5))) Variable Importance sd_Importance
1 Dental.Insurance 0.06225 0.00146
2 Health.Insurance 0.04407 0.00118
3 Age 0.04351 0.00102
4 Ethnicity 0.03320 0.00091
5 Religion 0.02402 0.00069
6 Income_median 0.02351 0.00132
7 EnglishDiff 0.02089 0.00080
8 Close.knit.Community 0.01636 0.00058
9 EnglishSpeak 0.01566 0.00069
10 Community.Trust 0.01557 0.00059
11 Get.Along 0.01520 0.00061
12 Community.Shares.Values 0.01495 0.00072
13 Religious.Attendance 0.01167 0.00040
14 Helpful.Community 0.01131 0.00043
15 Gender 0.01114 0.00057
16 Discrimination 0.01055 0.00058
17 Expression 0.00986 0.00049
18 Religious.Importance 0.00914 0.00041
19 See.Family 0.00851 0.00054
20 Successful.Family 0.00845 0.00042
21 Helpful.Family 0.00780 0.00053
22 Close.Friends 0.00778 0.00040
23 Close.Family 0.00748 0.00055
24 See.Friends 0.00733 0.00046
25 Spend.Time.Together 0.00717 0.00046
26 Helpful.Friends 0.00663 0.00056
27 Similar.Values 0.00657 0.00027
28 Family.Respect 0.00623 0.00029
29 Family.Pride 0.00548 0.00022
30 Feel.Close 0.00538 0.00029
31 Loyalty 0.00513 0.00023
32 Trust 0.00467 0.00038
33 Employment 0.00449 0.00030
34 Togetherness 0.00429 0.00029
Logistic regression (Interpretation)
lr <- rfdata |> select(Physical.Check.up,names(rfdata[,-1])[vsurf.mod$varselect.pred])
lr_mod <- glm(Physical.Check.up~.,family=binomial,data=lr)
summary(lr_mod)
Call:
glm(formula = Physical.Check.up ~ ., family = binomial, data = lr)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.927796 0.204799 -9.413 < 2e-16 ***
Dental.InsuranceYes 0.884275 0.115918 7.628 2.38e-14 ***
Health.InsuranceYes 1.195771 0.159916 7.477 7.58e-14 ***
Age 0.028965 0.003435 8.434 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 2405.0 on 1917 degrees of freedom
Residual deviance: 2148.8 on 1914 degrees of freedom
AIC: 2156.8
Number of Fisher Scoring iterations: 4
car::Anova(lr_mod)Analysis of Deviance Table (Type II tests)
Response: Physical.Check.up
LR Chisq Df Pr(>Chisq)
Dental.Insurance 57.892 1 2.769e-14 ***
Health.Insurance 58.248 1 2.311e-14 ***
Age 76.835 1 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
broom::tidy(lr_mod,exponentiate=T,conf.int=T)|> DT::datatable()Dental Check-up
rfdata <- qol |> select(`Dentist Check-up`, Ethnicity, Age, Gender,Religion, `Full Time Employment`, Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`,`Health Insurance`,`Dental Insurance`,`Discrimination`) |>
na.omit() |>
as.data.frame() |>
rename_with(make.names)
imbal <- ROSE::ROSE(Dentist.Check.up~.,
data=rfdata,
seed=3)$data
VSURF(Dentist.Check.up~.,imbal,na.action="na.omit",parallel=T,verbose=F)->vsurf.modWarning in VSURF.formula(Dentist.Check.up ~ ., imbal, na.action = "na.omit", : VSURF with a formula-type call outputs selected variables
which are indices of the input matrix based on the formula:
you may reorder these to get indices of the original data
vsurf.mod |> summary()
VSURF computation time: 26.4 secs
VSURF selected:
34 variables at thresholding step (in 8 secs)
16 variables at interpretation step (in 6.4 secs)
10 variables at prediction step (in 12 secs)
VSURF ran in parallel on a PSOCK cluster and used 15 cores
names(rfdata[,-1])[vsurf.mod$varselect.pred] [1] "Dental.Insurance" "Ethnicity"
[3] "English.Speaking" "Community.Trust"
[5] "See.Family" "See.Friends"
[7] "Community.Shares.Values" "Close.knit.Community"
[9] "Get.Along" "Religious.Attendance"
names(rfdata[,-1])[vsurf.mod$varselect.interp] [1] "Dental.Insurance" "Ethnicity"
[3] "Religion" "Income_median"
[5] "Age" "Health.Insurance"
[7] "Religious.Importance" "English.Difficulties"
[9] "English.Speaking" "Community.Trust"
[11] "See.Family" "See.Friends"
[13] "Community.Shares.Values" "Close.knit.Community"
[15] "Get.Along" "Religious.Attendance"
plot(vsurf.mod)
vsurf.mod$mean.perf[1] 0.1689817
Importance
vi<- data.frame(Variable=names(rfdata[,-1])[vsurf.mod$imp.mean.dec.ind],
Importance = vsurf.mod$imp.mean.dec,
sd_Importance = vsurf.mod$imp.sd.dec
)
vi |> mutate(across(Importance:sd_Importance,~round(.x,5))) Variable Importance sd_Importance
1 Dental.Insurance 0.07975 0.00124
2 Ethnicity 0.03747 0.00087
3 Religion 0.03295 0.00094
4 Income_median 0.02204 0.00065
5 Age 0.01849 0.00052
6 Health.Insurance 0.01589 0.00069
7 Religious.Importance 0.01535 0.00067
8 English.Difficulties 0.01519 0.00048
9 English.Speaking 0.01454 0.00050
10 Community.Trust 0.01400 0.00060
11 See.Family 0.01361 0.00059
12 See.Friends 0.01179 0.00059
13 Community.Shares.Values 0.00951 0.00046
14 Close.knit.Community 0.00930 0.00055
15 Get.Along 0.00908 0.00051
16 Religious.Attendance 0.00853 0.00039
17 Close.Family 0.00810 0.00058
18 Discrimination 0.00794 0.00038
19 Helpful.Community 0.00787 0.00050
20 Gender 0.00746 0.00069
21 Close.Friends 0.00731 0.00044
22 Spend.Time.Together 0.00727 0.00041
23 Feel.Close 0.00721 0.00044
24 Helpful.Family 0.00662 0.00055
25 Helpful.Friends 0.00641 0.00039
26 Expression 0.00562 0.00034
27 Similar.Values 0.00548 0.00037
28 Successful.Family 0.00512 0.00035
29 Trust 0.00423 0.00029
30 Full.Time.Employment 0.00407 0.00040
31 Family.Pride 0.00366 0.00033
32 Family.Respect 0.00359 0.00032
33 Togetherness 0.00322 0.00025
34 Loyalty 0.00270 0.00022
Logistic regression (Interpretation)
lr <- rfdata |> select(Dentist.Check.up,names(rfdata[,-1])[vsurf.mod$varselect.pred])
lr_mod <- glm(Dentist.Check.up~.,family=binomial,data=lr)
summary(lr_mod)
Call:
glm(formula = Dentist.Check.up ~ ., family = binomial, data = lr)
Coefficients:
Estimate Std. Error z value
(Intercept) -1.606e+00 4.957e-01 -3.239
Dental.InsuranceYes 1.515e+00 1.110e-01 13.642
EthnicityAsian Indian -1.409e+00 1.767e-01 -7.974
EthnicityFilipino -3.099e-01 2.213e-01 -1.400
EthnicityKorean -4.581e-01 1.703e-01 -2.690
EthnicityOther -4.781e-01 2.546e-01 -1.878
EthnicityVietnamese -1.360e-01 1.756e-01 -0.774
English.SpeakingNot well 9.322e-01 2.691e-01 3.464
English.SpeakingVery well 1.354e+00 2.755e-01 4.916
English.SpeakingWell 1.022e+00 2.695e-01 3.791
Community.TrustAgree 1.561e-01 4.744e-01 0.329
Community.TrustDisagree -1.679e-01 4.497e-01 -0.373
Community.TrustNeutral 6.964e-02 4.578e-01 0.152
Community.TrustStrongly agree -3.030e-02 5.712e-01 -0.053
See.Family 1.170e-01 4.406e-02 2.655
See.Friends -5.455e-06 4.291e-02 0.000
Community.Shares.ValuesAgree 4.233e-01 5.454e-01 0.776
Community.Shares.ValuesDisagree 2.318e-01 5.392e-01 0.430
Community.Shares.ValuesNeutral 2.772e-01 5.351e-01 0.518
Community.Shares.ValuesStrongly agree 1.346e-01 6.164e-01 0.218
Close.knit.CommunityAgree -2.303e-01 3.875e-01 -0.594
Close.knit.CommunityDisagree 5.051e-02 3.917e-01 0.129
Close.knit.CommunityNeutral -2.009e-01 3.776e-01 -0.532
Close.knit.CommunityStrongly agree 7.670e-01 4.499e-01 1.705
Get.AlongAgree -3.445e-02 6.921e-01 -0.050
Get.AlongDisagree -2.498e-02 6.663e-01 -0.037
Get.AlongNeutral 3.748e-02 6.809e-01 0.055
Get.AlongStrongly agree -5.985e-01 7.565e-01 -0.791
Religious.AttendanceNever -1.558e-01 1.860e-01 -0.837
Religious.AttendanceOnce or twice a month -6.317e-03 1.792e-01 -0.035
Religious.AttendanceSeldom -1.515e-01 1.954e-01 -0.776
Pr(>|z|)
(Intercept) 0.001197 **
Dental.InsuranceYes < 2e-16 ***
EthnicityAsian Indian 1.53e-15 ***
EthnicityFilipino 0.161515
EthnicityKorean 0.007139 **
EthnicityOther 0.060366 .
EthnicityVietnamese 0.438694
English.SpeakingNot well 0.000531 ***
English.SpeakingVery well 8.85e-07 ***
English.SpeakingWell 0.000150 ***
Community.TrustAgree 0.742042
Community.TrustDisagree 0.708798
Community.TrustNeutral 0.879093
Community.TrustStrongly agree 0.957705
See.Family 0.007928 **
See.Friends 0.999899
Community.Shares.ValuesAgree 0.437676
Community.Shares.ValuesDisagree 0.667225
Community.Shares.ValuesNeutral 0.604424
Community.Shares.ValuesStrongly agree 0.827147
Close.knit.CommunityAgree 0.552317
Close.knit.CommunityDisagree 0.897407
Close.knit.CommunityNeutral 0.594612
Close.knit.CommunityStrongly agree 0.088228 .
Get.AlongAgree 0.960305
Get.AlongDisagree 0.970094
Get.AlongNeutral 0.956103
Get.AlongStrongly agree 0.428840
Religious.AttendanceNever 0.402459
Religious.AttendanceOnce or twice a month 0.971883
Religious.AttendanceSeldom 0.437988
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 2593.0 on 1914 degrees of freedom
Residual deviance: 2199.3 on 1884 degrees of freedom
AIC: 2261.3
Number of Fisher Scoring iterations: 4
car::Anova(lr_mod)Analysis of Deviance Table (Type II tests)
Response: Dentist.Check.up
LR Chisq Df Pr(>Chisq)
Dental.Insurance 197.515 1 < 2.2e-16 ***
Ethnicity 82.944 5 < 2.2e-16 ***
English.Speaking 27.657 3 4.286e-06 ***
Community.Trust 2.021 4 0.731859
See.Family 7.067 1 0.007850 **
See.Friends 0.000 1 0.999899
Community.Shares.Values 1.964 4 0.742462
Close.knit.Community 16.557 4 0.002356 **
Get.Along 3.327 4 0.504609
Religious.Attendance 1.826 3 0.609189
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
broom::tidy(lr_mod,exponentiate=T,conf.int=T)|> DT::datatable()Folkmedicine
rfdata <- qol |> select(`Folkmedicine`, Ethnicity, Age, Gender,Religion, `Full Time Employment`, Income_median, `English Speaking`, `English Difficulties`,`See Family`:`Community Trust`,`Health Insurance`,`Dental Insurance`,`Discrimination`) |>
na.omit() |>
as.data.frame() |>
rename_with(make.names)
imbal <- ROSE::ROSE(Folkmedicine~.,
data=rfdata,
seed=3)$data
VSURF(Folkmedicine~.,imbal,na.action="na.omit",parallel=T,verbose=F)->vsurf.modWarning in VSURF.formula(Folkmedicine ~ ., imbal, na.action = "na.omit", : VSURF with a formula-type call outputs selected variables
which are indices of the input matrix based on the formula:
you may reorder these to get indices of the original data
vsurf.mod |> summary()
VSURF computation time: 57.7 secs
VSURF selected:
34 variables at thresholding step (in 7.6 secs)
33 variables at interpretation step (in 7.1 secs)
20 variables at prediction step (in 43 secs)
VSURF ran in parallel on a PSOCK cluster and used 15 cores
names(rfdata[,-1])[vsurf.mod$varselect.pred] [1] "Ethnicity" "Age"
[3] "Religion" "Full.Time.Employment"
[5] "Religious.Attendance" "English.Difficulties"
[7] "Get.Along" "Religious.Importance"
[9] "Helpful.Community" "Community.Shares.Values"
[11] "Community.Trust" "Close.knit.Community"
[13] "Feel.Close" "Similar.Values"
[15] "Togetherness" "Gender"
[17] "Family.Respect" "Loyalty"
[19] "Income_median" "Dental.Insurance"
names(rfdata[,-1])[vsurf.mod$varselect.interp] [1] "Ethnicity" "Age"
[3] "Religion" "English.Speaking"
[5] "Full.Time.Employment" "Religious.Attendance"
[7] "English.Difficulties" "Get.Along"
[9] "Religious.Importance" "Helpful.Community"
[11] "Discrimination" "Community.Shares.Values"
[13] "Community.Trust" "Close.knit.Community"
[15] "See.Family" "Feel.Close"
[17] "Helpful.Friends" "Close.Family"
[19] "See.Friends" "Helpful.Family"
[21] "Similar.Values" "Togetherness"
[23] "Expression" "Spend.Time.Together"
[25] "Close.Friends" "Gender"
[27] "Trust" "Successful.Family"
[29] "Family.Pride" "Family.Respect"
[31] "Loyalty" "Income_median"
[33] "Dental.Insurance"
plot(vsurf.mod)
vsurf.mod$mean.perf[1] 0.112059
Importance
vi<- data.frame(Variable=names(rfdata[,-1])[vsurf.mod$imp.mean.dec.ind],
Importance = vsurf.mod$imp.mean.dec,
sd_Importance = vsurf.mod$imp.sd.dec
)
vi |> mutate(across(Importance:sd_Importance,~round(.x,5))) Variable Importance sd_Importance
1 Ethnicity 0.07012 0.00168
2 Age 0.04766 0.00106
3 Religion 0.04361 0.00080
4 English.Speaking 0.03855 0.00136
5 Full.Time.Employment 0.03009 0.00155
6 Religious.Attendance 0.02845 0.00118
7 English.Difficulties 0.02418 0.00075
8 Get.Along 0.02316 0.00063
9 Religious.Importance 0.02066 0.00079
10 Helpful.Community 0.01931 0.00097
11 Discrimination 0.01919 0.00090
12 Community.Shares.Values 0.01862 0.00066
13 Community.Trust 0.01835 0.00059
14 Close.knit.Community 0.01702 0.00054
15 See.Family 0.01410 0.00039
16 Feel.Close 0.01380 0.00082
17 Helpful.Friends 0.01350 0.00060
18 Close.Family 0.01290 0.00042
19 See.Friends 0.01254 0.00058
20 Helpful.Family 0.01245 0.00055
21 Similar.Values 0.01168 0.00038
22 Togetherness 0.01091 0.00051
23 Expression 0.01085 0.00041
24 Spend.Time.Together 0.00979 0.00037
25 Close.Friends 0.00962 0.00031
26 Gender 0.00920 0.00051
27 Trust 0.00888 0.00038
28 Successful.Family 0.00844 0.00041
29 Family.Pride 0.00814 0.00039
30 Family.Respect 0.00783 0.00043
31 Loyalty 0.00683 0.00041
32 Income_median 0.00661 0.00033
33 Dental.Insurance 0.00514 0.00041
34 Health.Insurance 0.00301 0.00022
Logistic regression (Interpretation)
lr <- rfdata |> select(Folkmedicine,names(rfdata[,-1])[vsurf.mod$varselect.pred])
lr_mod <- glm(Folkmedicine~.,family=binomial,data=lr)
summary(lr_mod)
Call:
glm(formula = Folkmedicine ~ ., family = binomial, data = lr)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.640291 0.896661 -2.945 0.00323
EthnicityAsian Indian -0.153457 0.460432 -0.333 0.73892
EthnicityFilipino -0.817383 0.354354 -2.307 0.02107
EthnicityKorean 0.061646 0.216011 0.285 0.77535
EthnicityOther -0.755186 0.386572 -1.954 0.05076
EthnicityVietnamese -1.337963 0.296822 -4.508 6.56e-06
Age 0.026520 0.004552 5.827 5.66e-09
ReligionBuddhist 0.439710 0.317670 1.384 0.16631
ReligionCatholic 0.011301 0.363164 0.031 0.97518
ReligionHindu -0.400501 0.538575 -0.744 0.45710
ReligionMuslim -2.475080 1.135925 -2.179 0.02934
ReligionOther 0.419359 0.623588 0.672 0.50127
ReligionProtestant 0.074212 0.322011 0.230 0.81773
Full.Time.EmploymentEmployed full time -0.254735 0.162907 -1.564 0.11789
Religious.AttendanceNever 0.049044 0.313793 0.156 0.87580
Religious.AttendanceOnce or twice a month 0.292082 0.289355 1.009 0.31277
Religious.AttendanceSeldom 0.202232 0.308119 0.656 0.51160
English.DifficultiesMuch 0.043201 0.214955 0.201 0.84072
English.DifficultiesNot much 0.077021 0.192937 0.399 0.68974
English.DifficultiesVery much -0.113785 0.235636 -0.483 0.62918
Get.AlongAgree 0.922242 1.007104 0.916 0.35981
Get.AlongDisagree 0.514194 0.980497 0.524 0.59999
Get.AlongNeutral 1.291197 0.990589 1.303 0.19242
Get.AlongStrongly agree 1.091672 1.090678 1.001 0.31687
Religious.ImportanceNot very important -0.045192 0.303786 -0.149 0.88174
Religious.ImportanceSomewhat important -0.084846 0.344392 -0.246 0.80540
Religious.ImportanceVery important 0.040328 0.366743 0.110 0.91244
Helpful.CommunityAgree 0.142683 0.823359 0.173 0.86242
Helpful.CommunityDisagree 0.677331 0.794198 0.853 0.39374
Helpful.CommunityNeutral 0.089187 0.807354 0.110 0.91204
Helpful.CommunityStrongly agree -0.254427 0.916498 -0.278 0.78131
Community.Shares.ValuesAgree -0.014955 0.790263 -0.019 0.98490
Community.Shares.ValuesDisagree 0.169678 0.776478 0.219 0.82702
Community.Shares.ValuesNeutral 0.242902 0.776683 0.313 0.75448
Community.Shares.ValuesStrongly agree -0.777929 0.938196 -0.829 0.40701
Community.TrustAgree -1.030154 0.574986 -1.792 0.07319
Community.TrustDisagree -1.071120 0.531727 -2.014 0.04397
Community.TrustNeutral -1.507299 0.550992 -2.736 0.00623
Community.TrustStrongly agree -0.218153 0.728461 -0.299 0.76458
Close.knit.CommunityAgree -0.471825 0.514065 -0.918 0.35871
Close.knit.CommunityDisagree -0.757531 0.506246 -1.496 0.13456
Close.knit.CommunityNeutral -0.451102 0.490450 -0.920 0.35769
Close.knit.CommunityStrongly agree -0.460142 0.622897 -0.739 0.46008
Feel.CloseSomewhat agree -1.016763 0.798333 -1.274 0.20280
Feel.CloseSomewhat disagree -1.131442 0.807353 -1.401 0.16109
Feel.CloseStrongly agree -1.309587 0.821546 -1.594 0.11092
Similar.ValuesSomewhat agree 0.110678 0.698945 0.158 0.87418
Similar.ValuesSomewhat disagree -0.125115 0.705137 -0.177 0.85917
Similar.ValuesStrongly agree 0.236325 0.718582 0.329 0.74225
TogethernessSomewhat agree 1.529338 1.056668 1.447 0.14781
TogethernessSomewhat disagree 1.823154 1.050015 1.736 0.08251
TogethernessStrongly agree 1.719991 1.065960 1.614 0.10662
GenderMale -0.184717 0.156435 -1.181 0.23769
Family.RespectSomewhat agree -1.286228 0.740486 -1.737 0.08239
Family.RespectSomewhat disagree -1.318913 0.755416 -1.746 0.08082
Family.RespectStrongly agree -1.299311 0.760823 -1.708 0.08768
LoyaltySomewhat agree 0.776312 1.058637 0.733 0.46337
LoyaltySomewhat disagree 1.367005 1.051909 1.300 0.19376
LoyaltyStrongly agree 0.656261 1.070174 0.613 0.53973
Income_medianAbove 0.007527 0.162203 0.046 0.96299
Dental.InsuranceYes 0.381994 0.169205 2.258 0.02397
(Intercept) **
EthnicityAsian Indian
EthnicityFilipino *
EthnicityKorean
EthnicityOther .
EthnicityVietnamese ***
Age ***
ReligionBuddhist
ReligionCatholic
ReligionHindu
ReligionMuslim *
ReligionOther
ReligionProtestant
Full.Time.EmploymentEmployed full time
Religious.AttendanceNever
Religious.AttendanceOnce or twice a month
Religious.AttendanceSeldom
English.DifficultiesMuch
English.DifficultiesNot much
English.DifficultiesVery much
Get.AlongAgree
Get.AlongDisagree
Get.AlongNeutral
Get.AlongStrongly agree
Religious.ImportanceNot very important
Religious.ImportanceSomewhat important
Religious.ImportanceVery important
Helpful.CommunityAgree
Helpful.CommunityDisagree
Helpful.CommunityNeutral
Helpful.CommunityStrongly agree
Community.Shares.ValuesAgree
Community.Shares.ValuesDisagree
Community.Shares.ValuesNeutral
Community.Shares.ValuesStrongly agree
Community.TrustAgree .
Community.TrustDisagree *
Community.TrustNeutral **
Community.TrustStrongly agree
Close.knit.CommunityAgree
Close.knit.CommunityDisagree
Close.knit.CommunityNeutral
Close.knit.CommunityStrongly agree
Feel.CloseSomewhat agree
Feel.CloseSomewhat disagree
Feel.CloseStrongly agree
Similar.ValuesSomewhat agree
Similar.ValuesSomewhat disagree
Similar.ValuesStrongly agree
TogethernessSomewhat agree
TogethernessSomewhat disagree .
TogethernessStrongly agree
GenderMale
Family.RespectSomewhat agree .
Family.RespectSomewhat disagree .
Family.RespectStrongly agree .
LoyaltySomewhat agree
LoyaltySomewhat disagree
LoyaltyStrongly agree
Income_medianAbove
Dental.InsuranceYes *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1505.5 on 1898 degrees of freedom
Residual deviance: 1330.0 on 1838 degrees of freedom
AIC: 1452
Number of Fisher Scoring iterations: 6
car::Anova(lr_mod)Analysis of Deviance Table (Type II tests)
Response: Folkmedicine
LR Chisq Df Pr(>Chisq)
Ethnicity 29.911 5 1.536e-05 ***
Age 34.423 1 4.435e-09 ***
Religion 13.192 6 0.040093 *
Full.Time.Employment 2.463 1 0.116546
Religious.Attendance 1.536 3 0.674059
English.Difficulties 0.698 3 0.873790
Get.Along 8.026 4 0.090615 .
Religious.Importance 0.404 3 0.939369
Helpful.Community 4.465 4 0.346696
Community.Shares.Values 4.195 4 0.380204
Community.Trust 13.505 4 0.009056 **
Close.knit.Community 2.616 4 0.624005
Feel.Close 3.395 3 0.334665
Similar.Values 1.150 3 0.764943
Togetherness 4.097 3 0.251217
Gender 1.401 1 0.236517
Family.Respect 3.029 3 0.387116
Loyalty 3.890 3 0.273603
Income_median 0.002 1 0.962988
Dental.Insurance 5.171 1 0.022963 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
broom::tidy(lr_mod,exponentiate=T,conf.int=T)|> DT::datatable()